library(survey)
library(foreign)
library(ggplot2)
library(cowplot)
library(dplyr)
library(egg)
library(ggpubr)
library(gridGraphics)
library(wrapr)
library(list)

## see wave2_CLEANING.R for data cleaning
load('~/Desktop/ZB - Confidence Book/Technical Appendix/Data Files/clean_dataw2.RData')

# Column Cleans -----------------------------------------------------------

df <- as.data.frame(df)

df <- df[c(1,2,15,21,22,61,62, 156,178,188,190,192,194,198,212)]

colnames(df) <- c("CaseID", "Weight2", "PARTYID7", "Q1C", "Q1T",
                     "Q11", "Q12", "Q39", "VETERAN", "GENDER", "AGE4",
                     "RACE", "EDUC5", "REGION4", "IDEO7")
                     
df$LIST <- NA
df$LIST[df$Q1C < 7] <- 0
df$LIST[df$Q1T < 7] <- 1
df <- df[!is.na(df$LIST),]

df$Q1 <- rowSums(df[, c("Q1C", "Q1T")], na.rm=TRUE)
df$Q1[df$Q1 == 98] <- NA
df <- df[!is.na(df$Q1),]

df$Q11T <- 0
df$Q11T[df$Q11 < 3] <- 1
df$Q11T[df$Q11 > 4] <- NA
df <- df[!is.na(df$Q11T),]

df$Q12T <- 0
df$Q12T[df$Q12 == 1] <- 1
df$Q12T[df$Q12 > 76] <- NA
df <- df[!is.na(df$Q12T),]


## Create weighted survey design object ----------------------------------
w2_design <-
  svydesign(
    id = ~ 1,
    weights = ~ Weight2,
    data = df
  )

addline_format <- function(x,...){
  gsub('\\s','\n',x)
}

###Table 7.1
fig71_means <- svyby(~ Q1, ~ LIST, w2_design, svymean, na.rm = TRUE)
fig71_means


# Record Essential Demos --------------------------------------------------

df$VET <- ifelse(df$VETERAN == 1, 1, 0)
df <- df[!is.na(df$VET),]

df$REP <- ifelse(df$PARTYID7 > 4, 1, 0)
df <- df[!is.na(df$REP),]

df$DEM <- ifelse(df$PARTYID7 < 4, 1, 0)

df$PID3 <- NA
df$PID3[df$PARTYID7 < 4] <- 1
df$PID3[df$PARTYID7 == 4] <- 2
df$PID3[df$PARTYID7 > 4] <- 3

df$pid3 <- NA
df$pid3[df$PARTYID7 < 3] <- 1
df$pid3[df$PARTYID7 == 3] <- 2
df$pid3[df$PARTYID7 == 4] <- 2
df$pid3[df$PARTYID7 == 5] <- 2
df$pid3[df$PARTYID7 > 5] <- 3


df <- df[!is.na(df$DEM),]

df$Q39T <- NA
df$Q39T[df$Q39 < 77] <- 0
df$Q39T[df$Q39 == 1] <- 1
df <- df[!is.na(df$Q39T),]

df$IDEO3 <- NA
df$IDEO3[df$IDEO7 < 4] <- 0
df$IDEO3[df$IDEO7 == 4] <- 1
df$IDEO3[df$IDEO7 > 4] <- 2
df$IDEO3[df$IDEO7 > 7] <- NA
df <- df[!is.na(df$IDEO3),]

df$AGE <- df$AGE4 - 1

df$WOMAN <- ifelse(df$GENDER == 2, 1, 0)

df$WHITE <- 0
df$WHITE[df$RACE > 76] <- NA
df$BLACK[df$RACE == 1] <- 1
df <- df[!is.na(df$WHITE),]

df$BLACK <- 0
df$BLACK[df$RACE > 76] <- NA
df$BLACK[df$RACE == 2] <- 1
df$BLACK[df$RACE == 5] <- 1
df <- df[!is.na(df$BLACK),]

df$HISPANIC <- 0
df$HISPANIC[df$RACE > 76] <- NA
df$HISPANIC[df$RACE == 4] <- 1
df <- df[!is.na(df$HISPANIC),]

df$ASIAN <- 0
df$ASIAN[df$RACE > 76] <- NA
df$ASIAN[df$RACE == 6] <- 1
df <- df[!is.na(df$ASIAN),]

df$HS <- 0
df$HS[df$EDUC5 > 76] <- NA
df$HS[df$EDUC5 < 3] <- 1
df <- df[!is.na(df$HS),]

df$GRAD <- 0
df$GRAD[df$EDUC5 > 76] <- NA
df$GRAD[df$EDUC5 == 5] <- 1
df <- df[!is.na(df$GRAD),]

df$COLLEGE <- 0
df$COLLEGE[df$EDUC5 > 76] <- NA
df$COLLEGE[df$EDUC5 == 3] <- 1
df$COLLEGE[df$EDUC5 == 4] <- 1
df <- df[!is.na(df$COLLEGE),]

df$NORTHEAST <- 0
df$NORTHEAST[df$REGION4 > 76] <- NA
df$NORTHEAST[df$REGION4 == 1] <- 1
df <- df[!is.na(df$NORTHEAST),]

df$SOUTH <- 0
df$SOUTH[df$REGION4 > 76] <- NA
df$SOUTH[df$REGION4 == 3] <- 1
df <- df[!is.na(df$SOUTH),]

df$WEST <- 0
df$WEST[df$REGION4 > 76] <- NA
df$WEST[df$REGION4 == 4] <- 1
df <- df[!is.na(df$WEST),]


# Regression Models -------------------------------------------
### ICTREG
ml.results.full <- ictreg(Q1 ~ VET + Q39T + REP + DEM + IDEO3 + AGE + WOMAN + 
						BLACK + HISPANIC + ASIAN + COLLEGE + GRAD + NORTHEAST + SOUTH + WEST, 
						data = df, treat = "LIST", J=5, weights = "Weight2", method = "ml", fit.start = "glm",
						overdispersed = FALSE, constrained = TRUE) 
summary(ml.results.full)

###Logit on the Direct Question
logitmodel <- glm(Q12T ~ VET + Q39T + REP + DEM + IDEO3 + AGE + WOMAN + BLACK + HISPANIC + ASIAN + COLLEGE + GRAD + NORTHEAST + SOUTH + WEST,family=binomial(link='logit'),data=df, weights = Weight2)
summary(logitmodel)

###Logit on the Standard Confidence Question
logitmodel2 <- glm(Q11T ~ VET + Q39T + REP + DEM + IDEO3 + AGE + WOMAN + BLACK + HISPANIC + ASIAN + COLLEGE + GRAD + NORTHEAST + SOUTH + WEST,family=binomial(link='logit'),data=df, weights = Weight2)
summary(logitmodel2)

ml.full <- predict(ml.results.full, newdata=df, se.fit=TRUE, avg=TRUE)	
ml.full.fit <- mean(ml.full$fit)
ml.full.se <- mean(ml.full$se)

logit.full <- predict(logitmodel, newdata=df, se.fit=TRUE, avg=TRUE, type=c("response"))
logit.full.fit <- mean(logit.full$fit)
logit.full.se <- mean(logit.full$se)

logit.full2 <- predict(logitmodel2, newdata=df, se.fit=TRUE, avg=TRUE, type=c("response"))
logit.full2.fit <- mean(logit.full2$fit)
logit.full2.se <- mean(logit.full2$se)					

# Graphical Depiction of Models -------------------------------------------
dfCIV <- dfVET <- dfNOFAM <- dfFAM <- dfREP <- dfDEM <- dfIND <- dfLIB <- dfMOD <- dfCON <- df1829 <- df3044 <- df4559 <- df60 <- dfMAN <- dfWOMAN <- dfWHITE <- dfBLACK <- dfHISPANIC <- dfASIAN <- dfHS <- dfCOLLEGE <- dfGRAD <- dfNORTHEAST <- dfSOUTH <- dfWEST <- dfMIDW <- df

dfCIV$VET <- 0
dfVET$VET <- 1
dfNOFAM$Q39T <- 0
dfFAM$Q39T <- 1
dfREP[, "REP"] <- 1
dfREP[, "DEM"] <- 0
dfDEM[, "DEM"] <- 1
dfDEM[, "REP"] <- 0
dfIND[, "REP"] <- 0
dfIND[, "DEM"] <- 0
dfLIB[, "IDEO3"] <- 0
dfMOD[, "IDEO3"] <- 1
dfCON[, "IDEO3"] <- 2
df1829[, "AGE"] <- 0
df3044[, "AGE"] <- 1
df4559[, "AGE"] <- 2
df60[, "AGE"] <- 3
dfMAN[, "WOMAN"] <- 0
dfWOMAN[, "WOMAN"] <- 1
dfWHITE[, "BLACK"] <- 0
dfWHITE[, "HISPANIC"] <- 0
dfWHITE[, "ASIAN"] <- 0
dfBLACK[, "BLACK"] <- 1
dfHISPANIC[, "HISPANIC"] <- 1
dfASIAN[, "ASIAN"] <- 1
dfHS[, "COLLEGE"] <- 0
dfHS[, "GRAD"] <- 0
dfCOLLEGE[, "COLLEGE"] <- 1
dfGRAD[, "GRAD"] <- 1
dfNORTHEAST[, "NORTHEAST"] <- 1
dfSOUTH[, "SOUTH"] <- 1
dfWEST[, "WEST"] <- 1
dfMIDW[, "NORTHEAST"] <- 0
dfMIDW[, "SOUTH"] <- 0
dfMIDW[, "WEST"] <- 0

###MLE Predictions
mlCIV <- predict(ml.results.full, newdata = dfCIV,
                             se.fit = TRUE, avg = TRUE)
                             
mlVET <- predict(ml.results.full, newdata = dfVET,
                             se.fit = TRUE, avg = TRUE)
                             
mlNOFAM <- predict(ml.results.full, newdata = dfNOFAM,
                             se.fit = TRUE, avg = TRUE)  
                             
mlFAM <- predict(ml.results.full, newdata = dfFAM,
                             se.fit = TRUE, avg = TRUE)                             

mlREP <- predict(ml.results.full, newdata = dfREP,
                             se.fit = TRUE, avg = TRUE) 
                             
mlDEM <- predict(ml.results.full, newdata = dfDEM,
                             se.fit = TRUE, avg = TRUE)
                             
mlIND <- predict(ml.results.full, newdata = dfIND,
                             se.fit = TRUE, avg = TRUE)
                             
mlLIB <- predict(ml.results.full, newdata = dfLIB,
                             se.fit = TRUE, avg = TRUE) 

mlMOD <- predict(ml.results.full, newdata = dfMOD,
                             se.fit = TRUE, avg = TRUE)
                                                          
mlCON <- predict(ml.results.full, newdata = dfCON,
                             se.fit = TRUE, avg = TRUE)
                             
ml1829 <- predict(ml.results.full, newdata = df1829,
                             se.fit = TRUE, avg = TRUE)
                             
ml3044 <- predict(ml.results.full, newdata = df3044,
                             se.fit = TRUE, avg = TRUE)
                             
ml4559 <- predict(ml.results.full, newdata = df4559,
                             se.fit = TRUE, avg = TRUE)
                             
ml60 <- predict(ml.results.full, newdata = df60,
                             se.fit = TRUE, avg = TRUE)                                                                                                                    
                             
mlMAN <- predict(ml.results.full, newdata = dfMAN,
                             se.fit = TRUE, avg = TRUE)
                             
mlWOMAN <- predict(ml.results.full, newdata = dfWOMAN,
                             se.fit = TRUE, avg = TRUE)                                                         

mlWHITE <- predict(ml.results.full, newdata = dfWHITE,
                             se.fit = TRUE, avg = TRUE)
                             
mlBLACK <- predict(ml.results.full, newdata = dfBLACK,
                             se.fit = TRUE, avg = TRUE)
                             
mlHISPANIC <- predict(ml.results.full, newdata = dfHISPANIC,
                             se.fit = TRUE, avg = TRUE)
                             
mlASIAN <- predict(ml.results.full, newdata = dfASIAN,
                             se.fit = TRUE, avg = TRUE)                                                                                        

mlHS <- predict(ml.results.full, newdata = dfHS,
                             se.fit = TRUE, avg = TRUE)            

mlCOLLEGE <- predict(ml.results.full, newdata = dfCOLLEGE,
                             se.fit = TRUE, avg = TRUE)

mlGRAD <- predict(ml.results.full, newdata = dfGRAD,
                             se.fit = TRUE, avg = TRUE)

mlNORTHEAST <- predict(ml.results.full, newdata = dfNORTHEAST,
                             se.fit = TRUE, avg = TRUE)                                                                                    
                          
mlSOUTH <- predict(ml.results.full, newdata = dfSOUTH,
                             se.fit = TRUE, avg = TRUE)                                                                   
						
mlWEST <- predict(ml.results.full, newdata = dfWEST,
                             se.fit = TRUE, avg = TRUE)
                             					
mlMIDWEST <- predict(ml.results.full, newdata = dfMIDW,
                             se.fit = TRUE, avg = TRUE)
                             						
###Logit Predictions
logitCIV <- predict(logitmodel, newdata = dfCIV,
                             se.fit = TRUE, avg = TRUE, type=c("response"))
                             
logitVET <- predict(logitmodel, newdata = dfVET,
                             se.fit = TRUE, avg = TRUE, type=c("response"))
                             
logitNOFAM <- predict(logitmodel, newdata = dfNOFAM,
                             se.fit = TRUE, avg = TRUE, type=c("response"))  
                             
logitFAM <- predict(logitmodel, newdata = dfFAM,
                             se.fit = TRUE, avg = TRUE, type=c("response"))                             

logitREP <- predict(logitmodel, newdata = dfREP,
                             se.fit = TRUE, avg = TRUE, type=c("response")) 
                             
logitDEM <- predict(logitmodel, newdata = dfDEM,
                             se.fit = TRUE, avg = TRUE, type=c("response"))
                             
logitIND <- predict(logitmodel, newdata = dfIND,
                             se.fit = TRUE, avg = TRUE, type=c("response"))
                             
logitLIB <- predict(logitmodel, newdata = dfLIB,
                             se.fit = TRUE, avg = TRUE, type=c("response")) 

logitMOD <- predict(logitmodel, newdata = dfMOD,
                             se.fit = TRUE, avg = TRUE, type=c("response"))
                                                          
logitCON <- predict(logitmodel, newdata = dfCON,
                             se.fit = TRUE, avg = TRUE, type=c("response"))
                             
logit1829 <- predict(logitmodel, newdata = df1829,
                             se.fit = TRUE, avg = TRUE, type=c("response"))
                             
logit3044 <- predict(logitmodel, newdata = df3044,
                             se.fit = TRUE, avg = TRUE, type=c("response"))
                             
logit4559 <- predict(logitmodel, newdata = df4559,
                             se.fit = TRUE, avg = TRUE, type=c("response"))
                             
logit60 <- predict(logitmodel, newdata = df60,
                             se.fit = TRUE, avg = TRUE, type=c("response"))                                                                                                                    
                             
logitMAN <- predict(logitmodel, newdata = dfMAN,
                             se.fit = TRUE, avg = TRUE, type=c("response"))
                             
logitWOMAN <- predict(logitmodel, newdata = dfWOMAN,
                             se.fit = TRUE, avg = TRUE, type=c("response"))                                                         

logitWHITE <- predict(logitmodel, newdata = dfWHITE,
                             se.fit = TRUE, avg = TRUE, type=c("response"))
                             
logitBLACK <- predict(logitmodel, newdata = dfBLACK,
                             se.fit = TRUE, avg = TRUE, type=c("response"))
                             
logitHISPANIC <- predict(logitmodel, newdata = dfHISPANIC,
                             se.fit = TRUE, avg = TRUE, type=c("response"))
                             
logitASIAN <- predict(logitmodel, newdata = dfASIAN,
                             se.fit = TRUE, avg = TRUE, type=c("response"))                                                                                        

logitHS <- predict(logitmodel, newdata = dfHS,
                             se.fit = TRUE, avg = TRUE, type=c("response"))            

logitCOLLEGE <- predict(logitmodel, newdata = dfCOLLEGE,
                             se.fit = TRUE, avg = TRUE, type=c("response"))

logitGRAD <- predict(logitmodel, newdata = dfGRAD,
                             se.fit = TRUE, avg = TRUE, type=c("response"))

logitNORTHEAST <- predict(logitmodel, newdata = dfNORTHEAST,
                             se.fit = TRUE, avg = TRUE, type=c("response"))                                                                                    
                          
logitSOUTH <- predict(logitmodel, newdata = dfSOUTH,
                             se.fit = TRUE, avg = TRUE, type=c("response"))                                                                   
						
logitWEST <- predict(logitmodel, newdata = dfWEST,
                             se.fit = TRUE, avg = TRUE, type=c("response"))
                             					
logitMIDWEST <- predict(logitmodel, newdata = dfMIDW,
                             se.fit = TRUE, avg = TRUE, type=c("response"))
                            

####Graphing

predicted_values <- matrix(ncol = 4, nrow = 28, NA)
predicted_values <- as.data.frame(predicted_values)
colnames(predicted_values) <- c("Est", "SE", "Est2", "SE2")

predicted_values[1,] <- c(mean(logitCIV$fit), mean(logitCIV$se.fit), mlCIV$fit, mlCIV$se.fit)
predicted_values[2,] <- c(mean(logitVET$fit), mean(logitVET$se.fit), mlVET$fit, mlVET$se.fit)
predicted_values[3,] <- c(mean(logitNOFAM$fit), mean(logitNOFAM$se.fit), mlNOFAM$fit, mlNOFAM$se.fit)
predicted_values[4,] <- c(mean(logitFAM$fit), mean(logitFAM$se.fit), mlFAM$fit, mlFAM$se.fit)
predicted_values[5,] <- c(mean(logitREP$fit), mean(logitREP$se.fit), mlREP$fit, mlREP$se.fit)
predicted_values[6,] <- c(mean(logitDEM$fit), mean(logitDEM$se.fit), mlDEM$fit, mlDEM$se.fit)
predicted_values[7,] <- c(mean(logitIND$fit), mean(logitIND$se.fit), mlIND$fit, mlIND$se.fit)
predicted_values[8,] <- c(mean(logitLIB$fit), mean(logitLIB$se.fit), mlLIB$fit, mlLIB$se.fit)
predicted_values[9,] <- c(mean(logitMOD$fit), mean(logitMOD$se.fit), mlMOD$fit, mlMOD$se.fit)
predicted_values[10,] <- c(mean(logitCON$fit), mean(logitCON$se.fit), mlCON$fit, mlCON$se.fit)
predicted_values[11,] <- c(mean(logit1829$fit), mean(logit1829$se.fit), ml1829$fit, ml1829$se.fit)
predicted_values[12,] <- c(mean(logit3044$fit), mean(logit3044$se.fit), ml3044$fit, ml3044$se.fit)
predicted_values[13,] <- c(mean(logit4559$fit), mean(logit4559$se.fit), ml4559$fit, ml4559$se.fit)
predicted_values[14,] <- c(mean(logit60$fit), mean(logit60$se.fit), ml60$fit, ml60$se.fit)
predicted_values[15,] <- c(mean(logitMAN$fit), mean(logitMAN$se.fit), mlMAN$fit, mlMAN$se.fit)
predicted_values[16,] <- c(mean(logitWOMAN$fit), mean(logitWOMAN$se.fit), mlWOMAN$fit, mlWOMAN$se.fit)
predicted_values[17,] <- c(mean(logitWHITE$fit), mean(logitWHITE$se.fit), mlWHITE$fit, mlWHITE$se.fit)
predicted_values[18,] <- c(mean(logitBLACK$fit), mean(logitBLACK$se.fit), mlBLACK$fit, mlBLACK$se.fit)
predicted_values[19,] <- c(mean(logitHISPANIC$fit), mean(logitHISPANIC$se.fit), mlHISPANIC$fit, mlHISPANIC$se.fit)
predicted_values[20,] <- c(mean(logitASIAN$fit), mean(logitASIAN$se.fit), mlASIAN$fit, mlASIAN$se.fit)
predicted_values[21,] <- c(mean(logitHS$fit), mean(logitHS$se.fit), mlHS$fit, mlHS$se.fit)
predicted_values[22,] <- c(mean(logitCOLLEGE$fit), mean(logitCOLLEGE$se.fit), mlCOLLEGE$fit, mlCOLLEGE$se.fit)
predicted_values[23,] <- c(mean(logitGRAD$fit), mean(logitGRAD$se.fit), mlGRAD$fit, mlGRAD$se.fit)
predicted_values[24,] <- c(mean(logitNORTHEAST$fit), mean(logitNORTHEAST$se.fit), mlNORTHEAST$fit, mlNORTHEAST$se.fit)
predicted_values[25,] <- c(mean(logitSOUTH$fit), mean(logitSOUTH$se.fit), mlSOUTH$fit, mlSOUTH$se.fit)
predicted_values[26,] <- c(mean(logitWEST$fit), mean(logitWEST$se.fit), mlWEST$fit, mlWEST$se.fit)
predicted_values[27,] <- c(mean(logitMIDWEST$fit), mean(logitMIDWEST$se.fit), mlMIDWEST$fit, mlMIDWEST$se.fit)
predicted_values[28,] <- c(logit.full.fit, logit.full.se, ml.full.fit, ml.full.se)


predicted_values$Est <- as.numeric(predicted_values$Est)
predicted_values$SE <- as.numeric(predicted_values$SE)
predicted_values$Dif <- NA
predicted_values$Dif <- as.numeric(predicted_values$Est - predicted_values$Est2)
predicted_values$Q1L <- NA
predicted_values$Q1L <- length(df$Q1)
predicted_values$Q12TL <- NA
predicted_values$Q12TL <- length(df$Q12T)
predicted_values$TL <- NA
predicted_values$TL <- length(predicted_values$Dif)
predicted_values$SE3 <- NA
predicted_values$SE3 <- as.numeric(sqrt((predicted_values$SE2*predicted_values$SE2) +
					(predicted_values$SE*predicted_values$SE)))

					
predicted_values$Variable <- NA
predicted_values$Variable <- paste(c("Civilian", "Veteran", "No Mil in Family", "Military in Family", "Republican", "Democrat", "Independent", "Liberal", "Moderate", "Conservative", "Age 18-29", "Age 30-44", "Age 45-60", "Age 60+", "Man", "Woman", "White", "Black", "Hispanic", "Asian", "High School", "College", "Grad. Degree", "Northeast", "South", "West", "Midwest", "Average"))
predicted_values

theme_set(theme_bw())
  
fig73 <- ggplot(predicted_values, aes(x=reorder(Variable,-Dif), y=Dif)) +
  geom_pointrange(
    aes(ymin = Dif-(1.96*SE3), ymax = Dif+(1.96*SE3)),
    position = position_dodge(0.3)) +
  ylim(-.15,1) +
  ylab(c("Estimated Social Desirability Effect By Subgroup")) +
  xlab(c("Demographic Subgroups")) +
  geom_hline(yintercept = 0, size=0.3, color="#000000") + geom_hline(yintercept = 0.2849678, size=0.25, color="#000000", linetype="dashed") +
  theme(axis.title = element_text(size=11, face="bold"), axis.title.x = element_text(size=11, face="bold"),
        axis.title.y = element_text(size=11, face="bold"), axis.text.x = element_text(angle=90, size=8), axis.text.y = element_text(size=8))
        
setwd('~/Desktop/ZB - Confidence Book/Technical Appendix/Figures/Chapter 7 Figures')

fig73
ggsave("Figure 7.3 Direct Question.png")
ggsave("Figure 7.3 Direct Question.pdf")


direct_est <- matrix(ncol = 4, nrow = 3, NA)
direct_est <- as.data.frame(direct_est)
colnames(direct_est) <- c("D.Est", "D.SE", "LB", "UB")

diff_dir <- logit.full.fit - ml.full.fit
diff_se <- as.numeric(sqrt((logit.full.se*logit.full.se) +
					(ml.full.se*ml.full.se)))

direct_est[1,] <- c(ml.full.fit, ml.full.se, (ml.full.fit + 1.96*ml.full.se), (ml.full.fit - 1.96*ml.full.se))
direct_est[3,] <- c(logit.full.fit, logit.full.se, (logit.full.fit + 1.96*ml.full.se), (logit.full.fit - 1.96*ml.full.se))
direct_est[2,] <- c(diff_dir, logit.full.se, (diff_dir + 1.96*diff_se), (diff_dir - 1.96*diff_se))

direct_est$Variable2 <- NA
direct_est$Variable2 <- paste(c("List Estimate", "Difference", "Direct Question"))

  
fig73a <- ggplot(direct_est, aes(x=reorder(Variable2,-D.Est), y=D.Est)) +
  geom_pointrange(
    aes(ymin = LB, ymax = UB),
    position = position_dodge(0.3)) +
  ylim(-.15,1) +
  ylab(c("Estimated Social Desirability Effect By Subgroup")) +
  xlab(c("Demographic Subgroups")) +
  geom_hline(yintercept = 0, size=0.3, color="#000000") + geom_hline(yintercept = 0.2849678, size=0.25, color="#000000", linetype="dashed") +
  theme(axis.title = element_text(size=11, face="bold"), axis.title.x = element_text(size=11, face="bold"),
        axis.title.y = element_text(size=11, face="bold"), axis.text.x = element_text(angle=90, size=8), axis.text.y = element_text(size=8))
        
fig73a
ggsave("Figure 7.3a Direct Question.png")
ggsave("Figure 7.3a Direct Question.pdf")